*Generate random variable
set seed 1
gen random = runiformint(0,10000)

*Generate group for each individual and each month by year
sort ind_id edate //crucial step
egen ind_emy = group(ind_id emy)

*Generate th day in month 
bysort ind_emy: egen ind_emy_seq = seq() 

*ID for each month-by-year
sort emy
egen emy_id = group(emy)

*Create new price level base: 2017 January 
*(Original data: Nov 2011 prices as price base)
drop vnv_base
gen vnv_base = vnv if emy==tm(2017m1)
bysort ind_id: egen vnv_base_max = max(vnv_base)
replace vnv_base = vnv_base_max
drop vnv_base_max

*Inflation-adjust all variables
*and replace originals by inflation adjusted measures
foreach var in ///
total_income reg_income irr_income salary ///
dividend int_inc ///
sa_bal ca_bal ccard_bal sa_lim ca_lim ccard_lim ///
cash liquidity ///
od_amount payday_loan ///
total_fin_cost drattarv fit kostn2 utvextir ///
total_exp nec unnec ///
groc fuel pharm ca alc rmf group ///
lottery_exp lottery_char_exp gambling_exp gaming ///
cinema books craft fish ///
recr recr_area special sa ts swim toys ///
hi hs transp educ media charities ///
holiday ///
benefits inv_ben ub ///
loan_wo taxes stud_loan lottery_inc lottery_char_inc gambling_inc ins_claim ///
 {
gen `var'_fast = `var'*vnv_base/vnv
replace `var' = `var'_fast
drop `var'_fast
}

*Total lottery income or expenditures
gen tlottery_inc = lottery_inc + lottery_char_inc
gen tlottery_exp = lottery_exp + lottery_char_exp

*Expenditure supercategories
gen dur = hi + ca + transp
gen nondur = groc + fuel + rmf + pharm + recr + charities + sa 
gen tempt = alc + tlottery_exp + gambling_exp + gaming

*Durable (nondurable) exp. not in combination with nondurable (durable) exp.
gen dur_ex = 0
replace dur_ex = dur if nondur == 0 

gen nondur_ex = 0
replace nondur_ex = nondur if dur == 0

*Windfall dummy
gen windfall = tlottery_inc + gambling_inc + loan_wo + stud_loan + ins_claim + ///
taxes
gen windfall_dum = 0
replace windfall_dum = 1 if windfall > 0 & windfall != .

*Weekend dummy
gen weekend = 0
replace weekend = 1 if day_of_week == 0 | day_of_week == 6

*Second half of month dummy
gen second_half_month = 1 if ind_emy_seq >15 & ind_emy_seq!=.
replace second_half_month = 0 if ind_emy_seq <=15 & ind_emy_seq!=.

*Pay dummies
gen pay_day = 1 if reg_income > 0 & reg_income != .
replace pay_day = 0 if reg_income == 0
gen irr_income_day = 1 if irr_income > 0 & irr_income !=.
replace irr_income_day = 0 if irr_income == 0
gen income_day = 1 if total_income > 0 & total_income != .
replace income_day = 0 if total_income == 0
gen salary_day = 1 if salary > 0 & salary !=.
replace salary_day = 0 if salary == 0

*Generate sequence within household and within household day
bysort hh_id: egen hh_id_obs = seq()
bysort hh_edate: egen hh_edate_obs = seq() if alwaysin2memberhh ==1

*Winsorize variables at the 1% level
foreach var in ///
total_exp nec unnec ///
total_income reg_income irr_income salary {
winsor `var' if `var' != 0, gen(w1_`var') p(0.01) highonly
replace w1_`var' = 0 if `var' == 0
} 

********************************************************************************
*Normalized cash and overdraft
*(Note: Individual cash and overdraft variables already defined)

*Generate household cash and overdraft variables
bysort hh_edate: egen cash_HH = sum(cash) if alwaysin2memberhh==1
bysort hh_edate: egen od_amount_HH = sum(od_amount) if alwaysin2memberhh==1
bysort hh_edate: egen total_exp_HH = sum(total_exp) if alwaysin2memberhh==1
bysort hh_edate: egen total_income_HH = sum(total_income) if alwaysin2memberhh==1

*Create average daily expenditures
bysort ind_id: egen mean_total_exp = mean(total_exp)
bysort hh_id: egen mean_total_exp_HH = mean(total_exp_HH) if alwaysin2memberhh==1

*Create normalized cash and overdraft variables
gen cons1 = cash / mean_total_exp
gen cons2  = liquidity / mean_total_exp 
gen od_exp = od_amount / mean_total_exp
gen cons1_HH = cash_HH / mean_total_exp_HH
gen od_exp_HH = od_amount_HH / mean_total_exp_HH

********************************************************************************
*Cash and liquidity terciles

***Create variables that exclude zeroes 
foreach var in ///
w1_total_income w1_reg_income w1_irr_income w1_salary ///
total_fin_cost sa_bal ca_bal ccard_bal ///
utvextir od_amount ///
payday_loan ///
cash liquidity  ///
cons1 cons2 od_exp ///
drattarv fit kostn2 ///
w1_total_exp w1_nec w1_unnec {
gen p_`var' = `var' if `var' != 0
}

*Individual x day level
foreach var in cons1 cons2 {
pctile cut_`var' = `var', nq(3) //variable reduced to 2 values. The 2 values are the separators. They are the 33th and 66th percentiles.
xtile `var'_terc = `var', cutpoints(cut_`var') //variable reduced to 3 categories, separated by pct1
drop cut_`var'
}

*Individual level:
foreach var in cons1 cons2 {
bysort ind_id: egen median_`var'_terc = median(`var'_terc)
}
********************************************************************************
*Income quartile 

*Individual x day level
foreach var in total_income {
pctile cut_`var' = `var' if total_income!=0, nq(4) //variable reduced to 3 values. The 3 values are the separators. They are the 33th and 66th percentiles.
xtile `var'_quart = `var' if total_income!=0, cutpoints(cut_`var') //variable reduced to 3 categories, separated by pct1
drop cut_`var'
}
*Individual level
foreach var in total_income {
bysort ind_id: egen median_`var'_quart = median(`var'_quart)
}
bysort ind_id: egen max_total_income = max(total_income)
replace total_income_quart = 1 if max_total_income==0

********************************************************************************
*Person types
foreach var in gambling tlottery {
gen `var'_dum = 0 if `var'_exp ==0 & `var'_inc == 0
replace `var'_dum = 1 if `var'_exp > 0 & `var'_exp !=. | ///
			 `var'_inc > 0 & `var'_inc !=.
bysort ind_id: egen `var'_pers = max(`var'_dum)
}

foreach var in tempt inv_ben benefits loan_wo alc gaming {
gen `var'_dum = 0 if `var' == 0
replace `var'_dum = 1 if `var' > 0 & `var' !=.
bysort ind_id: egen `var'_pers = max(`var'_dum)
}
foreach var in tlottery {
gen `var'_win = 0 if `var'_inc == 0
replace `var'_win = 1 if `var'_inc > 0 & `var'_inc !=.
bysort ind_id: egen `var'_winner = max(`var'_win)
}

bysort ind_id: egen max_ccard_count = max(ccard_count)
gen ccard_pers = 0 if max_ccard_count == 0
replace ccard_pers = 1 if max_ccard_count > 0 & max_ccard_count !=.

********************************************************************************
*Age categories
generate byte age_cat=1 if age < 30 & age !=.
replace age_cat=2 if age>=30 & age<45 & age !=. 
replace age_cat=3 if age>=45 & age<60 & age !=.
replace age_cat=4 if age>=60 & age !=.

********************************************************************************

*Individual-month-year specific monthly income
foreach var in ///
total_income reg_income salary irr_income {
bysort ind_emy: egen month_`var' = sum(`var')
}
foreach var in total_income_HH {
bysort ind_emy: egen month_`var' = sum(`var') if alwaysin2memberhh==1
}

********************************************************************************
**Individual-specific average of variable
foreach var in ///
dur nondur tempt ///
int_inc dividend kostn2 ///
groc fuel pharm /// 
month_total_income month_reg_income month_irr_income month_salary ///
total_income reg_income salary irr_income ///
sa_bal ca_bal ccard_bal sa_lim ca_lim ccard_lim ///
drattarv fit utvextir total_fin_cost ///
od_amount od_exp ///
cash liquidity ///
cons1 cons2 ///
total_exp nec unnec ///
alc charities gaming educ recr lottery_exp gambling_exp ///
ca_count sa_count ccard_count {
bysort ind_id: egen `var'_mean = mean(`var')
}

foreach var in ///
month_total_income_HH {
bysort ind_id: egen `var'_mean = mean(`var')
}

*Variables normalized by individual-specific monthly income
foreach var in ///
ccard_lim ca_lim /// 
 {
gen `var'_imInc = `var' / month_total_income_mean
}


********************************************************************************
***Variables normalized by their individual-specific average
foreach var in ///
	dur nondur tempt ///
	month_total_income month_reg_income month_irr_income month_salary ///
	total_exp nec unnec ///
	groc fuel pharm ///
	total_income reg_income salary irr_income ///
	sa_bal ca_bal ccard_bal sa_lim ca_lim ccard_lim ///
	drattarv fit utvextir kostn2 total_fin_cost od_amount ///
	cash liquidity ///
	cons1 cons2 od_exp ///
	int_inc dividend ///
	ca_count sa_count ccard_count {
gen `var'_rel = `var' / `var'_mean
replace `var'_rel = 0 if `var'_mean == 0
}

*************************************************************************
*Variables normalized by individual-specific average daily expenditures
foreach var in ///
total_income reg_income ///
irr_income salary ///
dividend int_inc ///
sa_bal ca_bal ccard_bal ///
sa_lim ca_lim ccard_lim ///
od_amount payday_loan cash liquidity ///
total_fin_cost ///
drattarv fit kostn2 utvextir ///
total_exp nec unnec ///
dur nondur tempt ///
dur_ex nondur_ex ///
groc fuel pharm ca alc rmf group ///
lottery_exp lottery_char_exp gambling_exp gaming ///
cinema books craft fish ///
recr recr_area special sa ts swim toys ///
hi hs transp educ media charities holiday ///
benefits inv_ben ub /// //benefits
loan_wo taxes stud_loan lottery_inc lottery_char_inc gambling_inc ins_claim /// //windfalls
{
gen `var'_exp = `var' / mean_total_exp
}

*************************************************************************
*Daily absolute balances change
foreach var in ///
sa_count ///
total_income reg_income irr_income salary /// //income
ca_bal sa_bal ccard_bal /// //balances
ca_lim sa_lim ccard_lim /// //balances limits
total_fin_cost drattarv fit kostn2 /// //fees etc
od_amount utvextir /// //overdraft
cash liquidity  /// //liquidity
cons1 cons2 od_exp /// //relative to average expenditures
total_exp nec unnec /// //expenditure supercategories
dur nondur tempt { //expenditure supercategories
sort ind_id edate
gen `var'_dchange = `var' - `var'[_n-1] if ind_id[_n]==ind_id[_n-1] 
gen `var'_rdchange = `var'_rel - `var'_rel[_n-1] if ind_id[_n]==ind_id[_n-1] 
}

gen sa_dum = 1 if sa_count_dchange >= 1 & sa_count_dchange!=.
replace sa_dum = 0 if sa_count_dchange <1 & sa_count_dchange !=.



